Introduction

In this script, the relative fraction of different cell types is evaluated between survival groups in the different anatomical areas.

library(tidyverse)
library(ggpubr)
library(rstatix)
library(survival)
library(pheatmap)

wd <- file.path(getwd(), '..')

Load data

df_data <- read.csv2(file.path(wd, 'input_data', 'expression_data.csv'), stringsAsFactors = FALSE)
df_annotations <- read.csv2(file.path(wd, 'input_data', 'cell_annotations.csv'), stringsAsFactors = FALSE)
df_meta <- read.csv2(file.path(wd, 'input_data', 'clinical_data.csv'), stringsAsFactors = FALSE)
df_areas <- read.csv2(file.path(wd, 'intermediate_results', 'anatomical_areas.csv'), stringsAsFactors = FALSE)

Immune fraction

Relative fraction of immune cells across regions

round_base <- function(x, base = 50) {
  return(round(x / base) * base)
}

# First we make the squares of 50*50 microns.
df_data <- df_data %>% 
  mutate(X2 = round_base(X, 50), Y2 = round_base(Y, 50))

level_data <- df_annotations %>% 
  filter(level == 'first_level') %>% 
  left_join(df_data %>% select(slide_id, scene_id, OID, X, Y, X2, Y2)) %>% 
  left_join(df_meta) %>% 
  left_join(df_areas)

immune_cells <- c("MF", "MG", "Tcyt", "Tdn", "B cell", "DC", "Monocyte", "MDSC", "Treg", "Th")

df_landscape <- level_data %>% 
  mutate(CellType = ifelse(CellType %in% immune_cells, 'immune', 'other')) %>% 
  group_by(survival_group, couple_id, patient_id, region, CellType) %>% 
  summarise(N = n()) %>% 
  ungroup() %>% 
  spread(CellType, N, fill = 0) %>% 
  gather(CellType, N, -survival_group, -couple_id, -patient_id, -region) %>% 
  group_by(survival_group, patient_id, couple_id, region) %>% 
  mutate(M = sum(N)) %>% 
  ungroup() %>% 
  mutate(P = 100*N/M)

ggplot(df_landscape, aes(paste(survival_group, patient_id), P, fill = CellType)) + geom_col() + theme_bw() + facet_grid(region~couple_id, scales = 'free_x', space = 'free_x') + theme_bw() + theme(legend.position = 'bottom') + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + ylab('Cell Percentage (%)') + xlab('patient identifier') + ggtitle('immune level')

stat.test <- df_landscape %>% 
  mutate(survival_group = survival_group == 'eLTS') %>% 
  group_by(region, CellType) %>%
  summarise(average_eLTS = mean(P[survival_group == TRUE]), average_STS = mean(P[survival_group == FALSE]), pVal = summary(clogit(survival_group ~ P + strata(couple_id)))$coefficients[5]) %>% 
  ungroup()

DT::datatable(stat.test, filter = 'top')
ggboxplot(df_landscape, x = "survival_group", y = "P", fill = "survival_group") + 
  theme_bw() + 
  geom_jitter(alpha = 0.2) +
  scale_fill_manual(values = c('royalblue', 'goldenrod')) +
  theme(legend.position = 'none') +
  xlab(NULL) + ylab('Relative Cell Fraction (%)') +
  facet_grid(CellType~region, scales = 'free_y')

tmp_plot <- df_landscape %>% 
  group_by(couple_id, survival_group, CellType, region) %>% 
  summarise(P = mean(P)) %>% 
  ungroup() %>% 
  mutate(survival_group = plyr::mapvalues(survival_group, from = c('STS', 'eLTS'), to = c(1, 2)))

b <- runif(nrow(tmp_plot), -0.1, 0.1)

ggplot(tmp_plot) +
  geom_boxplot(aes(x = as.numeric(survival_group), y = P, group = survival_group, fill = survival_group)) +
  geom_point(aes(x = as.numeric(survival_group) + b, y = P), alpha = 0.2) +
  geom_line(aes(x  = as.numeric(survival_group) + b, y = P, group = couple_id), alpha = 0.2) +
  scale_x_continuous(breaks = c(1,2), labels = c("STS", "eLTS"))+
  xlab("survival_group") + facet_grid(CellType~region, scales = 'free_y') + 
  scale_fill_manual(values = c('royalblue', 'goldenrod')) +
  theme_bw()

tmp_plot <- tmp_plot %>% 
  mutate(survival_group = plyr::mapvalues(survival_group, from = c(1,2), to = c('STS', 'eLTS'))) %>%
  group_by(couple_id) %>% 
  mutate(QC = n_distinct(survival_group)) %>% 
  ungroup() %>% 
  filter(QC == max(QC)) %>% 
  select(-QC) %>% 
  spread(survival_group, P, fill = 0) %>% 
  mutate(delta = eLTS-STS) %>% 
  group_by(CellType, region) %>% 
  arrange(-delta) %>% 
  mutate(N = 1:n()) %>% 
  ungroup() %>% 
  group_by(CellType, region) %>% 
  mutate(label_col = delta/max(abs(delta))) %>% 
  ungroup()

ggplot(tmp_plot, aes(N, delta, fill = label_col)) + 
  geom_col() + 
  theme_bw() + 
  facet_grid(CellType~region, scales = 'free') + 
  theme_bw() + 
  xlab('patient ranking') +
  scale_fill_gradient2(low = 'royalblue', high = 'firebrick', mid = 'white', midpoint = 0)

tmp_plot <- tmp_plot %>% 
  mutate(ID = paste(CellType, region, sep = '_')) %>% 
  select(couple_id, ID, delta) %>% 
  spread(ID, delta, fill = 0)

tmp_matrix <- tmp_plot %>% select(-couple_id) %>% as.matrix()
rownames(tmp_matrix) <- tmp_plot$couple_id

pheatmap(tmp_matrix)

norm_matrix <- scale(tmp_matrix)
norm_matrix[is.na(norm_matrix)] <- 0
pheatmap(norm_matrix)

All levels

We are going to iterate through all the levels and do the analysis per level per region. At each level we compare relative expression, for example: - MF relative to all cells - M2 MF relative to all MF

for(i in unique(df_annotations$level)){
  print(i)
  level_data <- df_annotations %>% 
    filter(level == i) %>% 
    left_join(df_data %>% select(slide_id, scene_id, OID, X, Y, X2, Y2)) %>% 
    left_join(df_meta) %>% 
    left_join(df_areas)
  
  df_landscape <- level_data %>% 
    group_by(survival_group, couple_id, patient_id, region, CellType) %>% 
    summarise(N = n()) %>% 
    ungroup() %>% 
    spread(CellType, N, fill = 0) %>% 
    gather(CellType, N, -survival_group, -couple_id, -patient_id, -region) %>% 
    group_by(survival_group, patient_id, couple_id, region) %>% 
    mutate(M = sum(N)) %>% 
    ungroup() %>% 
    mutate(P = 100*N/M)
  
  print(ggplot(df_landscape, aes(paste(survival_group, patient_id), P, fill = CellType)) + geom_col() + theme_bw() + facet_grid(region~couple_id, scales = 'free_x', space = 'free_x') + theme_bw() + theme(legend.position = 'bottom') + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + ylab('Cell Percentage (%)') + xlab('patient identifier') + ggtitle(i))
  
  if(i == 'first_level'){
    tmp_plot <- df_landscape %>% 
      mutate(couple_id = plyr::mapvalues(couple_id, 
                                         from = c('B', 'C', 'E', 'F', 'G', 'I', 'J', 'K', 'L', 'M', 'N', 'P'),
                                         to = paste0('P', str_pad(c(1:n_distinct(couple_id)), width = 2, pad = '0'))))
    
    print(ggplot(tmp_plot, aes(paste(survival_group, patient_id), P, fill = CellType)) + 
            geom_col() + 
            theme_bw() + 
            facet_grid(region~couple_id, scales = 'free_x', space = 'free_x') + 
            theme_bw() + 
            theme(legend.position = 'bottom') + 
            theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
            ylab('Cell Percentage (%)') + 
            xlab('Patient') + 
            ggtitle(i) +
            scale_fill_manual(values = c('B cell'= rgb(1, 0.64, 0), 'NOS' = rgb(0.33, 0.33, 0.33), 'Tcyt' = rgb(0, 0.2, 0.4), 'DC' = rgb(0.51, 0.26, 0), 'MDSC' = rgb(1, 0.71, 0.76), 'Monocyte' = rgb(0.3, 1, 0.3), 'Th' = rgb(0.15, 0.4, 0.8), 'Treg' = rgb(0.3, 0.56, 1), 'Tumor' = rgb(1, 0, 0), 'MF' = rgb(0.1, 0.5, 0.1), 'MG' = rgb(0.25, 0.7, 0.25), 'Tdn' = rgb(0, 0, 0.2))) + scale_x_discrete(labels = function(x) gsub(" .*", "", x)))
    
    df_landscape <- df_landscape %>% 
      filter(CellType != 'NOS') %>% 
      group_by(survival_group, patient_id, couple_id, region) %>% 
      mutate(M = sum(N)) %>% 
      ungroup() %>% 
      mutate(P = 100*N/M)
    
    tmp_plot <- df_landscape %>% 
      mutate(couple_id = plyr::mapvalues(couple_id, 
                                         from = c('B', 'C', 'E', 'F', 'G', 'I', 'J', 'K', 'L', 'M', 'N', 'P'),
                                         to = paste0('P', str_pad(c(1:n_distinct(couple_id)), width = 2, pad = '0'))))
    
    print(ggplot(tmp_plot, aes(paste(survival_group, patient_id), P, fill = CellType)) + 
            geom_col() + 
            theme_bw() + 
            facet_grid(region~couple_id, scales = 'free_x', space = 'free_x') + 
            theme_bw() + 
            theme(legend.position = 'bottom') + 
            theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
            ylab('Cell Percentage (%)') + 
            xlab('Patient') + 
            ggtitle(i) +
            scale_fill_manual(values = c('B cell'= rgb(1, 0.64, 0), 'Tcyt' = rgb(0, 0.2, 0.4), 'DC' = rgb(0.51, 0.26, 0), 'MDSC' = rgb(1, 0.71, 0.76), 'Monocyte' = rgb(0.3, 1, 0.3), 'Th' = rgb(0.15, 0.4, 0.8), 'Treg' = rgb(0.3, 0.56, 1), 'Tumor' = rgb(1, 0, 0), 'MF' = rgb(0.1, 0.5, 0.1), 'MG' = rgb(0.25, 0.7, 0.25), 'Tdn' = rgb(0, 0, 0.2))) + scale_x_discrete(labels = function(x) gsub(" .*", "", x)))
    
  }
  
  stat.test <- df_landscape %>% 
    mutate(survival_group = survival_group == 'eLTS') %>% 
    group_by(region, CellType) %>%
    summarise(average_eLTS = mean(P[survival_group == TRUE]), average_STS = mean(P[survival_group == FALSE]), pVal = summary(clogit(survival_group ~ P + strata(couple_id)))$coefficients[5]) %>% 
    ungroup()
  
  DT::datatable(stat.test, filter = 'top')
  
  print(ggboxplot(df_landscape, x = "survival_group", y = "P", fill = "survival_group") + #, palette = "jco", # ylim = c(0, 40)
          theme_bw() + 
          geom_jitter(alpha = 0.2) +
          scale_fill_manual(values = c('royalblue', 'goldenrod')) +
          theme(legend.position = 'none') +
          xlab(NULL) + ylab('Relative Cell Fraction (%)') +
          facet_grid(CellType~region, scales = 'free_y') + 
          ggtitle(i))
  
  # Deltas
  
  tmp_plot <- df_landscape %>% 
    group_by(couple_id, survival_group, CellType, region) %>% 
    summarise(P = mean(P)) %>% 
    ungroup() %>% 
    mutate(survival_group = plyr::mapvalues(survival_group, from = c('STS', 'eLTS'), to = c(1, 2)))
  
  b <- runif(nrow(tmp_plot), -0.1, 0.1)
  
  print(ggplot(tmp_plot) +
          geom_boxplot(aes(x = as.numeric(survival_group), y = P, group = survival_group, fill = survival_group)) +
          geom_point(aes(x = as.numeric(survival_group) + b, y = P), alpha = 0.2) +
          geom_line(aes(x  = as.numeric(survival_group) + b, y = P, group = couple_id), alpha = 0.2) +
          scale_x_continuous(breaks = c(1,2), labels = c("STS", "eLTS"))+
          xlab("survival_group") + facet_grid(CellType~region, scales = 'free_y') + 
          scale_fill_manual(values = c('royalblue', 'goldenrod')) +
          theme_bw())
  
  tmp_plot <- tmp_plot %>% 
    mutate(survival_group = plyr::mapvalues(survival_group, from = c(1,2), to = c('STS', 'eLTS'))) %>%
    group_by(couple_id) %>% 
    mutate(QC = n_distinct(survival_group)) %>% 
    ungroup() %>% 
    filter(QC == max(QC)) %>% 
    select(-QC) %>% 
    spread(survival_group, P, fill = 0) %>% 
    mutate(delta = eLTS-STS) %>% 
    group_by(CellType, region) %>% 
    arrange(-delta) %>% 
    mutate(N = 1:n()) %>% 
    ungroup() %>% 
    group_by(CellType, region) %>% 
    mutate(label_col = delta/max(abs(delta))) %>% 
    ungroup()
  
  print(ggplot(tmp_plot, aes(N, delta, fill = label_col)) + geom_col() + theme_bw() + facet_grid(CellType~region, scales = 'free') + theme_bw() + scale_fill_gradient2(low = 'royalblue', high = 'firebrick', mid = 'white', midpoint = 0) + xlab('patient ranking'))
  
  tmp_plot <- tmp_plot %>% 
    mutate(ID = paste(CellType, region, sep = '_')) %>% 
    select(couple_id, ID, delta) %>% 
    spread(ID, delta, fill = 0)
  
  tmp_matrix <- tmp_plot %>% select(-couple_id) %>% as.matrix()
  rownames(tmp_matrix) <- tmp_plot$couple_id
  
  pheatmap(tmp_matrix)
  
  norm_matrix <- scale(tmp_matrix)
  norm_matrix[is.na(norm_matrix)] <- 0
  pheatmap(norm_matrix)
  
  if(i == 'first_level'){
    tmp_plot <- df_landscape %>% 
      filter(CellType %in% c('MF', 'MG')) %>% 
      select(patient_id, survival_group, region, CellType, P) %>% 
      spread(CellType, P, fill = 0) %>% 
      mutate(survival_group = factor(survival_group, levels = c('STS', 'eLTS')))
    
    
    print(ggplot(tmp_plot, aes(MF, MG, color = survival_group)) + 
            geom_point() + 
            theme_bw() + 
            facet_wrap(~region, scales = 'free') + 
            geom_smooth(method = 'lm') + 
            stat_cor() + 
            scale_color_manual(values = c('royalblue', 'goldenrod')) + 
            xlab('Relative Macrophage Percentage (%)') + 
            ylab('Relative Microglia Percentage (%)'))
    
    tmp_plot <- df_landscape %>% 
      filter(CellType %in% c('MF', 'MG')) %>% 
      select(patient_id, survival_group, region, CellType, P) %>% 
      spread(CellType, P, fill = 0) %>% 
      mutate(ratio = log2(MF/MG)) %>% 
      group_by(region) %>% 
      mutate(tmp_rank = rank(ratio)) %>% 
      mutate(survival_group = factor(survival_group, levels = c('STS', 'eLTS')))
    
    print(ggplot(tmp_plot, aes(tmp_rank, ratio, fill = survival_group)) +
            geom_col() + 
            theme_bw() + 
            facet_wrap(~region, scales = 'free', nrow = 1) + 
            scale_fill_manual(values = c('royalblue', 'goldenrod')) + 
            xlab('Patient ranking') + 
            ylab('Macrophages Microglia log2 ratio') + 
            theme(legend.position = 'bottom')) 
  }
  
  if(i %in% c('first_level', 'gbm_neftel')){
    
    df_landscape <- level_data %>% 
      group_by(survival_group, couple_id, patient_id, treated, number_resections, slide_id, scene_id, region, CellType) %>% 
      summarise(N = n()) %>% 
      ungroup() %>% 
      spread(CellType, N, fill = 0) %>% 
      gather(CellType, N, -survival_group, -couple_id, -patient_id, -treated, -number_resections, -slide_id, -scene_id, -region) %>% 
      group_by(survival_group, patient_id, treated, number_resections, slide_id, scene_id, couple_id, region) %>% 
      mutate(M = sum(N)) %>% 
      ungroup() %>% 
      mutate(P = 100*N/M)
    
    tmp_table <- df_landscape %>% 
      group_by(survival_group, patient_id, couple_id, treated, number_resections, region, CellType) %>% 
      summarise(average_fraction = mean(P), sd_fraction = sd(P)) %>% 
      ungroup()
    
    DT::datatable(tmp_table, filter = 'top')
    
    for(j in unique(tmp_table$region)){
      print(ggplot(tmp_table %>% filter(region == j), aes(paste0('R', number_resections), average_fraction, fill = CellType)) + 
              geom_col() + geom_errorbar(aes(ymin = average_fraction-sd_fraction, ymax = average_fraction + sd_fraction)) +
              theme_bw() + 
              facet_grid(CellType~survival_group+couple_id+patient_id+treated, scales = 'free', space = 'free_x', switch = 'y') +
              theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
              theme(legend.position = 'none') + 
              ggtitle(paste(i, j)) +
              xlab('Resection ID') + ylab('Average Cell Percentage (%)') +
              theme(strip.text.y.left = element_text(angle = 0)))
    }
  }
}
## [1] "first_level"

## [1] "Th_subtypes"

## [1] "Microglia_subtypes"

## [1] "gbm_neftel"

## [1] "Tcy_activation"

## [1] "Tcy_subtypes"

## [1] "Mf_subtypes"

## [1] "gbm_stem"

## [1] "DC_subtypes"